library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x stringr::boundary() masks strucchange::boundary()
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks stats::lag()
## x dplyr::select()     masks MASS::select()
library(ggthemes)
library(ggplot2)

Granger causality analysis of all data data.

Theory

Let’s see if we can get CMC from it?

VOIs <- c('Action', 
          'LTM', 
          'Perception', 
          'Procedural', 
          'WM')

TASKS <- c('Gambling',
           'Relational',
           'Social',
           'WM',
           'Language',
           'Emotion')

#S <- length(grep("sub-", dir())) # Num of subjects
R <- length(VOIs) # Num ROIs
#SP <- matrix(rep(0, R*R*S), nrow =  S )

GC analysis at the individual level

First, we need to perform GC analysis on all data.

# np <- c()
df <- NULL

for (task in TASKS) {
  for (sub in dir(task)[grep("sub-", dir(task))]) {
    M <-read_tsv(paste(task, sub, "cmc.txt", sep="/"), 
                 col_names = VOIs, 
                 col_types = cols(
                   Action = col_double(),
                   LTM = col_double(),
                   Perception = col_double(),
                   Procedural = col_double(),
                   WM = col_double()
                 ))
    
    # Select the best lag, using BIC or "Schwartz Criterion"
    
    lag <- VARselect(M)$selection[3]
    
    gm <- VAR(M, type="none", p = lag)
    
    coef <- coefficients(gm)
    
    for (v in 1:R) {
      voi <- VOIs[v]
      
      Estimates <- coef[[v]][,1][1 : (R * lag)]  # estimates
      Pvalues <- coef[[v]][,4][1 : (R * lag)]  # p-values
      
      subdf <- data.frame(Task = rep(task, R * lag),
                          Subject = rep(sub, R*lag),
                          To = rep(voi, R*lag),
                          From = rep(VOIs, lag),
                          Lag = sort(rep(1:lag, R)),
                          Estimate = Estimates,
                          p = Pvalues)
      if (is.null(df)) {
        df <- subdf
      } else {
        df <- rbind(df, subdf)
      }
    }
  }
}
  
granger_data_complete <- as_tibble(df)

Now, because we have different Lags, we are going to pick the smallest p value for each region across all lags

granger_data <- granger_data_complete %>%
  group_by(Task, Subject, To, From) %>%
  summarise(p = min(p),
            Estimate = mean(Estimate)) %>%
  mutate(Link = if_else(p < 0.05, 1, 0))
## `summarise()` has grouped output by 'Task', 'Subject', 'To'. You can override using the `.groups` argument.

Visualize Some Data

Here is an example of the four timeseries

data <- M %>%
  add_column(Time = 1:nrow(M)) %>%
  pivot_longer(cols = c(Action, LTM, Perception, Procedural, WM),
               names_to = "Region",
               values_to = "BOLD")

ggplot(data, aes(x=Time, y=BOLD, col=Region)) +
  geom_line() +
  scale_color_brewer(palette="Set2") +
  facet_wrap(~ Region) +
  theme_pander()

#rownames(P) <- VOIs
#colnames(P) <- VOIs

And here are examples of the p values associated to each connection, in each participant, for each task.

for (task in TASKS) {
  p <- ggplot(filter(granger_data, 
                Task == task),
         aes(x=From, y=To)) +
    geom_tile(aes(fill = p), col="white") +
    scale_fill_viridis_c(option="inferno") +
    ggtitle("Probability of Connection") +
    coord_equal(ratio = 1) +
    facet_wrap(~ Subject) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle(task) +
    theme_pander() 
  print(p)
}

# ggplot(granger_data, aes(x=From, y=To)) +
#   geom_tile(aes(fill=Link), col="white") +
#   scale_fill_viridis_c(option="inferno", end = 0.8) +
#   ggtitle("Inferred Architecture") +
#   facet_wrap(Subject ~ Task ) +
#   coord_equal(ratio = 1) +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
#   theme_pander() 

Let’s look at the distribution of lags:

lags <- granger_data_complete %>%
  group_by(Task, Subject) %>%
  summarise(Lag = max(Lag))
## `summarise()` has grouped output by 'Task'. You can override using the `.groups` argument.
ggplot(lags, aes(x=Lag, fill=Task)) +
  geom_histogram(col="white", 
                 binwidth = 1, 
                 position = "dodge",
                 alpha=0.7) +
  theme_pander()

Inferring Architectures

To infer an architecture from a distribution of p-values, we need a few statistical functions. Here are a few that will be computed.

Mode <- function(codes) {
  which.max(tabulate(codes))
}

fisher.chi <- function(pvals) {
    logsum <- -2 * sum(log(pvals))
    1 - pchisq(logsum, df = (2 * length(pvals)))
}

friston.test <- function(pvals) {
    max(pvals) ** length(pvals)
}

nichols.test <- function(pvals) {
    max(pvals)
}

tippet.test <- function(pvals) {
  s <- min(pvals)
  1 - (1-s)**length(pvals)
}

binom.test.cmc <- function(binvals) {
  binom.test(sum(binvals), 
             n=length(binvals),
             alternative = "less")$p.value
}

And now we can aggregate the data by task, using the functions above.

group_data <- granger_data %>%
  group_by(Task, To, From) %>%
  summarize(p.nichols = nichols.test(p),
            p.friston = friston.test(p),
            Plink = binom.test.cmc(Link),
            Sign = sum(2*Link - 1),
            Weight = mean(Estimate)) %>%
  mutate(Link = if_else(Plink > 0.95, 1, 0),
         FristonLink = if_else(p.friston < 0.05, 1, 0))
## `summarise()` has grouped output by 'Task', 'To'. You can override using the `.groups` argument.

Task-level Architectures

Here are the task-level functional architectures.

ggplot(group_data,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=Link), col="white") +
  facet_wrap(~ Task) +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
  ggtitle(paste(task, "Architecture")) +
  coord_equal(ratio = 1) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander() 

Now, let’s calculate an architecture across all tasks. We will use majority rule: If a connection apperas in at least half of the tasks, it will be considered part of the architecture.

architecture <- group_data %>%
  group_by(To, From) %>%
  summarise(Link = if_else(sum(Link) >= 2, 1, 0)) %>%
  add_column(Connectivity = "Empirical")
## `summarise()` has grouped output by 'To'. You can override using the `.groups` argument.

And now we can compare it to the CMC theory.

cmc <- read_tsv("cmc_theory.txt",
                col_types = cols(
                  From = col_character(),
                  To = col_character(),
                  Link = col_double()
                ))

cmc <- cmc %>%
  add_column(Connectivity = "CMC Theory")

architecture <- full_join(architecture,
                          cmc)
## Joining, by = c("To", "From", "Link", "Connectivity")
ggplot(architecture,
       aes(x=From, y=To)) +
  geom_tile(aes(fill=Link), col="white") +
  facet_wrap(~ Connectivity) +
  scale_fill_viridis_c(option="inferno", end = 0.8) +
  ggtitle("Predicted vs. Observed Architecture") +
  coord_equal(ratio = 1) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme_pander()